from collections import Counter
from itertools import chain

import jieba
import pandas as pd

train_data = pd.read_csv('./train.tsv', sep='\t')


set1 = list(chain(*map(lambda x: jieba.lcut(x), train_data['sentence'])))
word_dict = Counter(set1)
sorted_word_counts = sorted(word_dict.items(), key=lambda x: x[1], reverse=True)
for key, value in sorted_word_counts:
    print(key, value)
